Introduction

In this report, I will take you through a re-analysis methylation data first described by Choufani et al (2019).

In their study, they analysed the DNA methylation patterns of 44 placenta samples conceived naturally (NAT) and 44 placenta samples conceivced through artificial reproductive technologies (ART) (23 in vitro, 21 in vivo).

The platform used in the study is the Illumina Infinium HumanMethylation450k BeadChip assay.

The methylation data have been deposited to NCBI GEO repository accession number GSE120250.

Loading packages

These packackes will help us to perform vital steps such as normalisation, filtering, differential analysis, etc, and provide information about the array probe annotaions.

suppressPackageStartupMessages({
    library("R.utils")
    library("missMethyl")
    library("limma")
    library("topconfects")
    library("minfi")
    library("IlluminaHumanMethylation450kmanifest")
    library("MethylToSNP")
    library("RColorBrewer")
    library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
    library("eulerr")
    library("plyr")
    library("gplots")
    library("reshape2")
    library("beeswarm")
    library("GEOquery")
    library("readxl")
    
  })
# Annotation
ann450k <- getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19)
myann <- data.frame(ann450k[,c("UCSC_RefGene_Name","Regulatory_Feature_Group")])
promoters <- grep("Prom",myann$Regulatory_Feature_Group)

Loading functions

These functions provide shortcuts to help with charts and other analysis. They will eventually be shoved into another Rscript or package but can stay here for now.

# scree plot shows the amount of variation in a dataset that is accounted
# for by the first N principal components
myscree <- function(mx,n=10,main="") {
  pc <- princomp(mx)$sdev
  pcp <- pc/sum(pc)*100
  pcp <- pcp[1:10]
  barplot(pcp,cex.names = 1,las=2,ylim=c(0,60),
      ylab="percent (%) variance explained", main=main)
  text((0.5:length(pcp)*1.2),pcp,label=signif(pcp,3),pos=3,cex=0.8)
}


# Here is a function to make a volcano plot
make_volcano <- function(dm,name,mx) {
    sig <- subset(dm,adj.P.Val<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,logFC>0))
    N_DN=nrow(subset(sig,logFC<0))
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn")
    plot(dm$logFC,-log10(dm$P.Val),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log FC", ylab="-log10 pval")
    mtext(HEADER)
    grid()
    points(sig$logFC,-log10(sig$P.Val),cex=0.5,pch=19,col="red")
}

# Here is a function to make heatmaps 
make_heatmap <- function(dm,name,mx,n) {
  topgenes <-  rownames(head(dm[order(dm$P.Value),],n))
  ss <- mx[which(rownames(mx) %in% topgenes),]
  my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
  heatmap.2(ss,scale="row",margin=c(10, 10),cexRow=0.4,trace="none",cexCol=0.4,
      col=my_palette, main=name)
}

# make beeswarm charts
# dm = a limma differential meth object
# name = character name of the limma dm object
# mx = matrix of normalised data
# groups = a vector of factors corresponding to the cols in mx
# n = the number of top significant genes to plot (default = 15) 
make_beeswarms <- function(dm,name,mx,groups,n=15) {
    par(mar=c(3,3,1,1))
    NCOLS=5
    NROWS=floor(n/NCOLS)
    if (n %% NCOLS > 0) { NROWS <- NROWS + 1 }
    par(mfrow=c(NROWS, NCOLS))
    topgenes <-  rownames(head(dm[order(dm$P.Value),],n))
    ss <- mx[which(rownames(mx) %in% topgenes),]
    n <- 1:n
    g1name=levels(groups)[1]
    g2name=levels(groups)[2]
    g1dat <- ss[n,which(groups == g1name)]
    g2dat <- ss[n,which(groups == g2name)]
    g1l <-lapply(split(g1dat, row.names(g1dat)), unlist)
    g2l <-lapply(split(g2dat, row.names(g2dat)), unlist)

    for (i in n) {
      mydat <- list(g1l[[i]],g2l[[i]])
        beeswarm(mydat,ylim=c(0,1),cex=0.2, pch=19,
        las=2, cex.lab=0.6, main=names( g1l )[i] , 
        ylab="",labels = c(g1name,g2name))
      grid()
    }
}

# make beeswarm charts for best confects
# dm = a limma differential meth object
# name = character name of the limma dm object
# mx = matrix of normalised data
# groups = a vector of factors corresponding to the cols in mx
# n = the number of top significant genes to plot (default = 15) 
make_beeswarms_confects <- function(confects,name,mx,groups,n=15) {
    par(mar=c(3,3,1,1))
    NCOLS=5
    NROWS=floor(n/NCOLS)
    if (n %% NCOLS > 0) { NROWS <- NROWS + 1 }
    par(mfrow=c(NROWS, NCOLS))
    topgenes <-  head(confects$table,n)$name
    ss <- mx[which(rownames(mx) %in% topgenes),]
    n <- 1:n
    g1name=levels(groups)[1]
    g2name=levels(groups)[2]
    g1dat <- ss[n,which(groups == g1name)]
    g2dat <- ss[n,which(groups == g2name)]
    g1l <-lapply(split(g1dat, row.names(g1dat)), unlist)
    g2l <-lapply(split(g2dat, row.names(g2dat)), unlist)

    for (i in n) {
      mydat <- list(g1l[[i]],g2l[[i]])
        beeswarm(mydat,ylim=c(0,1),cex=0.2, pch=19,
        las=2, cex.lab=0.6, main=names( g1l )[i] , 
        ylab="",labels = c(g1name,g2name))
      grid()
    }
}

# this is a wrapper which creates three charts
# We will be adding more
make_dm_plots <- function(dm,name,mx,groups=groups,confects=confects) {
    make_volcano(dm,name,mx)
    make_beeswarms(dm ,name , mx , groups , n= 15)
    make_heatmap(dm , name , mx ,n = 50)
    make_beeswarms_confects(confects, name, mx, groups, n=15)
}  

# this is a function which will perform differential methylation analysis
# if you provide it with the right inputs
dm_analysis <- function(samplesheet,sex,groups,mx,name,myann,beta) {

    design <- model.matrix(~ sex + groups)
    mxs <- mx[,which( colnames(mx) %in% samplesheet$Basename )]
    fit.reduced <- lmFit(mxs,design)
    fit.reduced <- eBayes(fit.reduced)
    summary(decideTests(fit.reduced))
    dm <- topTable(fit.reduced,coef=3, number = Inf)
    dma <- merge(myann,dm,by=0)
    dma <- dma[order(dma$P.Value),]
    dm_up <- rownames(subset(dm,adj.P.Val<0.05 & logFC>0))
    dm_dn <- rownames(subset(dm,adj.P.Val<0.05 & logFC<0))
    length(dm_up)
    length(dm_dn)
    confects <- limma_confects(fit.reduced, coef=3, fdr=0.05)
    make_dm_plots(dm = dm ,name=name , mx=beta, groups= groups, confects=confects)
    dat <- list("dma"=dma, "dm_up"=dm_up, "dm_dn"=dm_dn, "confects"=confects)

}

Data Import

Importing Data from GEO. Raw IDAT files and accompanying Series Matrix publicly available on GEO.

#Create Dirtectory
dir.create("GSE120250")
## Warning in dir.create("GSE120250"): 'GSE120250' already exists
ARRAY_SAMPLESHEET="GSE120250/GSE120250_series_matrix.txt.gz"
# only download it if it is not present on the system
if ( !file.exists(ARRAY_SAMPLESHEET ) ) {
    DLFILE=paste(ARRAY_SAMPLESHEET,".gz",sep="")
    download.file("https://ftp.ncbi.nlm.nih.gov/geo/series/GSE120nnn/GSE120250/matrix/GSE120250_series_matrix.txt.gz",
        destfile = DLFILE)
    gunzip(DLFILE)
}

# Reading in GEO series matrix
gse <- getGEO(filename="/mnt/mnorris/ART_methylation/GSE120250/GSE120250_series_matrix.txt.gz")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   ID_REF = col_character()
## )
## See spec(...) for full column specifications.
## File stored at:
## /tmp/RtmpI4KgiP/GPL13534.soft
## Warning: 65 parsing failures.
##    row     col           expected     actual         file
## 485513 SPOT_ID 1/0/T/F/TRUE/FALSE rs10796216 literal data
## 485514 SPOT_ID 1/0/T/F/TRUE/FALSE rs715359   literal data
## 485515 SPOT_ID 1/0/T/F/TRUE/FALSE rs1040870  literal data
## 485516 SPOT_ID 1/0/T/F/TRUE/FALSE rs10936224 literal data
## 485517 SPOT_ID 1/0/T/F/TRUE/FALSE rs213028   literal data
## ...... ....... .................. .......... ............
## See problems(...) for more details.
ARRAY_DATA="GSE120250/GSE120250_RAW.tar"
# only download it if it is not present on the system
if ( !dir.exists("GSE120250/IDAT") ) {
  dir.create("GSE120250/IDAT")
  download.file("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE120250&format=file",
    destfile = ARRAY_DATA)
    untar(exdir = "GSE120250/IDAT", tarfile = "GSE120250/GSE120250_RAW.tar",)
}

baseDir <- "./GSE120250"
targets <- pData(phenoData(gse))

mybase <- unique(gsub("_Red.idat.gz" ,"", gsub("_Grn.idat.gz", "" ,list.files("./GSE120250",pattern = "GSM",recursive = TRUE))))
targets$Basename <- paste("GSE120250/", mybase, sep = "")

head(targets$Basename)
## [1] "GSE120250/IDAT/GSM3396785_9992576119_R02C01"
## [2] "GSE120250/IDAT/GSM3396786_9992576022_R02C02"
## [3] "GSE120250/IDAT/GSM3396787_9992576033_R01C02"
## [4] "GSE120250/IDAT/GSM3396788_9992576022_R04C02"
## [5] "GSE120250/IDAT/GSM3396789_9992576207_R03C02"
## [6] "GSE120250/IDAT/GSM3396790_9992576022_R05C02"
rgSet <- read.metharray.exp(targets = targets)
mSet <- preprocessRaw(rgSet)
mSetSw <- SWAN(mSet,verbose=FALSE)
## [SWAN] Preparing normalization subset
## 450k
## [SWAN] Normalizing methylated channel
## [SWAN] Normalizing unmethylated channel
par(mfrow=c(1,2))
densityByProbeType(mSet[,1], main = "Raw")
densityByProbeType(mSetSw[,1], main = "SWAN")
Figure 1. Normalisation of bead-array data with SWAN.

Figure 1. Normalisation of bead-array data with SWAN.

Filter probes

Here we are running parallel analyses, both including and excluding sex chromosomes.

# include sex chromosomes
detP <- detectionP(rgSet)
keep <- rowSums(detP < 0.01) == ncol(rgSet)
mSetSw <- mSetSw[keep,]

# exclude SNP probes
#mSetSw <- mapToGenome(mSetSw)
#mSetSw_nosnp <- dropLociWithSnps(mSetSw)
#dim(mSetSw)
#dim(mSetSw_nosnp)
#mSetSw <- mSetSw_nosnp

# exclude sex chromosomes
keep <- !(featureNames(mSetSw) %in% ann450k$Name[ann450k$chr %in% c("chrX","chrY")])
mSetFlt <- mSetSw[keep,]
head(mSetFlt)
## class: MethylSet 
## dim: 6 88 
## metadata(0):
## assays(2): Meth Unmeth
## rownames(6): cg00000957 cg00001349 ... cg00002719 cg00002837
## rowData names(0):
## colnames(88): GSM3396785_9992576119_R02C01 GSM3396786_9992576022_R02C02
##   ... GSM3396871_9992576119_R05C01 GSM3396872_9992576207_R02C02
## colData names(39): title geo_accession ... Basename filenames
## Annotation
##   array: IlluminaHumanMethylation450k
##   annotation: ilmn12.hg19
## Preprocessing
##   Method: SWAN (based on a MethylSet preprocessed as 'Raw (no normalization or bg correction)')
##   minfi version: 1.32.0
##   Manifest version: 0.4.0
dim(mSetFlt)
## [1] 468596     88

Extracting Beta and M-values

# include sex chromosomes
meth <- getMeth(mSetSw)
unmeth <- getUnmeth(mSetSw)
Mval <- log2((meth + 100)/(unmeth + 100))
beta <- getBeta(mSetSw)

# exclude sex chromosomes
meth <- getMeth(mSetFlt)
unmeth <- getUnmeth(mSetFlt)
Mval_flt <- log2((meth + 100)/(unmeth + 100))
beta_flt <- getBeta(mSetFlt)

Remove probes that appear to overlap SNP

DOI: https://doi.org/10.1186/s13072-019-0321-6

#testing
#test <- MethylToSNP(beta, gap.ratio = 0.5, gap.sum.ratio = 0.4, SNP=SNPs.147CommonSingle #,verbose=FALSE)
#nrow(test)
# include sex chromosomes
# evidence of SNP
#snp_probe_dat <- MethylToSNP(beta, gap.ratio = 0.5, gap.sum.ratio = 0.4,
#    SNP=SNPs.147CommonSingle ,verbose=FALSE)
#dim(snp_probe_dat)
#head(snp_probe_dat)
#snp_probes <- rownames(snp_probe_dat)
# >50% confidence of SNP
#nrow(snp_probe_dat[which(snp_probe_dat$confidence>0.5),])
# remove SNP probes
#Mval <- Mval[which(!rownames(Mval) %in% snp_probes),]
#dim(Mval)
# evidence of SNP
#snp_probe_dat <- MethylToSNP(beta_flt, SNP=SNPs.147CommonSingle ,verbose=FALSE)
#dim(snp_probe_dat)
#head(snp_probe_dat)
#snp_probes <- rownames(snp_probe_dat)
# >50% confidence of SNP
#nrow(snp_probe_dat[which(snp_probe_dat$confidence>0.5),])
# remove SNP probes
#Mval_flt <- Mval_flt[which(!rownames(Mval_flt) %in% snp_probes),]
#dim(Mval_flt)

MDS analysis

[Multidimensional scaling(https://en.wikipedia.org/wiki/Multidimensional_scaling) plot is a method used to identify the major sources of variation in a dataset. In the MDS plots below, I will be plotting the first two dimensions (principal components [PCs]), with each sample label coloured either by ART classification, sex, ART and sex, and then array chip and then sample plate.

We wil begin with MDS analysis including the sex chromosomes and then exclude them.

First, let’s quantify the contribution of the major principal components. with a scree plot, we can see whether most of the variation is captured in the first two PCs or whether it is spread over more PCs.

Scree Plot

par(mfrow=c(2,1))  
myscree(Mval,main="incl sex chr")
myscree(Mval_flt,main="excl sex chr")
Figure 2. Screeplot shows contribution of top principal components to the overall variation in the experiment.

Figure 2. Screeplot shows contribution of top principal components to the overall variation in the experiment.

MDS by ART

sample_groups <- factor(targets$characteristics_ch1.2)
head(sample_groups)                        
##                      V2                      V3                      V4 
## art treatment: in vitro  art treatment: in vivo art treatment: in vitro 
##                      V5                      V6                      V7 
##  art treatment: in vivo art treatment: in vitro art treatment: in vitro 
## Levels: art treatment: in vitro art treatment: in vivo art treatment: NA
colour_palette=brewer.pal(n = length(levels(sample_groups)), name = "Paired")
colours <- colour_palette[as.integer(factor(targets$art))]
plot(1,axes = FALSE,xlab="",ylab="",main="MDS by ART type")
legend("center",legend=levels(sample_groups),pch=16,cex=1.2,col=colour_palette)
Figure 3. MDS plot coloured by ART classification.

Figure 3. MDS plot coloured by ART classification.

mydist <- plotMDS(Mval, labels=rownames(targets),col=colours,main="sex chromosomes included")
Figure 3. MDS plot coloured by ART classification.

Figure 3. MDS plot coloured by ART classification.

mydist_flt <- plotMDS(Mval_flt, labels=rownames(targets),col=colours,main="sex chromosomes excluded")
Figure 3. MDS plot coloured by ART classification.

Figure 3. MDS plot coloured by ART classification.

MDS by Sex

sample_groups <- factor(targets$characteristics_ch1.1)
colour_palette=brewer.pal(n = length(levels(sample_groups)), name = "Paired")
## Warning in brewer.pal(n = length(levels(sample_groups)), name = "Paired"): minimal value for n is 3, returning requested palette with 3 different levels
colours <- colour_palette[as.integer(factor(targets$characteristics_ch1.1))]
plot(1,axes = FALSE,xlab="",ylab="",main="MDS by sex")
legend("center",legend=levels(sample_groups),pch=16,cex=1.2,col=colour_palette)
Figure 4. MDS plot coloured by sex.

Figure 4. MDS plot coloured by sex.

plotMDS(mydist, labels=rownames(targets),col=colours,main="sex chromosomes included")
Figure 4. MDS plot coloured by sex.

Figure 4. MDS plot coloured by sex.

plotMDS(mydist_flt, labels=rownames(targets) ,col=colours,main="sex chromosomes excluded")
Figure 4. MDS plot coloured by sex.

Figure 4. MDS plot coloured by sex.

MDS by ART and sex

targets$ARTgender <- paste(targets$`art treatment:ch1`,targets$`gender:ch1`, sep=("_"))

targets$ARTgender
##  [1] "in vitro_M" "in vivo_M"  "in vitro_M" "in vivo_F"  "in vitro_M"
##  [6] "in vitro_F" "in vitro_F" "in vivo_F"  "in vivo_F"  "in vitro_F"
## [11] "in vitro_F" "in vitro_F" "in vivo_F"  "in vitro_F" "in vivo_M" 
## [16] "in vitro_F" "in vitro_M" "in vitro_M" "in vivo_M"  "in vitro_M"
## [21] "in vitro_M" "in vitro_M" "in vivo_F"  "in vitro_F" "in vivo_F" 
## [26] "in vivo_F"  "in vivo_M"  "in vitro_M" "in vitro_M" "in vivo_M" 
## [31] "in vitro_F" "in vitro_F" "in vivo_M"  "in vivo_M"  "in vitro_M"
## [36] "in vivo_M"  "in vivo_F"  "in vivo_M"  "in vitro_F" "in vitro_M"
## [41] "in vivo_M"  "in vivo_F"  "in vivo_M"  "in vivo_M"  "NA_F"      
## [46] "NA_F"       "NA_M"       "NA_F"       "NA_M"       "NA_F"      
## [51] "NA_F"       "NA_M"       "NA_F"       "NA_M"       "NA_M"      
## [56] "NA_M"       "NA_M"       "NA_F"       "NA_F"       "NA_M"      
## [61] "NA_F"       "NA_F"       "NA_M"       "NA_M"       "NA_F"      
## [66] "NA_F"       "NA_M"       "NA_F"       "NA_M"       "NA_M"      
## [71] "NA_F"       "NA_M"       "NA_M"       "NA_M"       "NA_M"      
## [76] "NA_F"       "NA_F"       "NA_F"       "NA_M"       "NA_M"      
## [81] "NA_F"       "NA_M"       "NA_F"       "NA_F"       "NA_M"      
## [86] "NA_F"       "NA_M"       "NA_M"
sample_groups <- factor(targets$ARTgender)
sample_groups
##  [1] in vitro_M in vivo_M  in vitro_M in vivo_F  in vitro_M in vitro_F
##  [7] in vitro_F in vivo_F  in vivo_F  in vitro_F in vitro_F in vitro_F
## [13] in vivo_F  in vitro_F in vivo_M  in vitro_F in vitro_M in vitro_M
## [19] in vivo_M  in vitro_M in vitro_M in vitro_M in vivo_F  in vitro_F
## [25] in vivo_F  in vivo_F  in vivo_M  in vitro_M in vitro_M in vivo_M 
## [31] in vitro_F in vitro_F in vivo_M  in vivo_M  in vitro_M in vivo_M 
## [37] in vivo_F  in vivo_M  in vitro_F in vitro_M in vivo_M  in vivo_F 
## [43] in vivo_M  in vivo_M  NA_F       NA_F       NA_M       NA_F      
## [49] NA_M       NA_F       NA_F       NA_M       NA_F       NA_M      
## [55] NA_M       NA_M       NA_M       NA_F       NA_F       NA_M      
## [61] NA_F       NA_F       NA_M       NA_M       NA_F       NA_F      
## [67] NA_M       NA_F       NA_M       NA_M       NA_F       NA_M      
## [73] NA_M       NA_M       NA_M       NA_F       NA_F       NA_F      
## [79] NA_M       NA_M       NA_F       NA_M       NA_F       NA_F      
## [85] NA_M       NA_F       NA_M       NA_M      
## Levels: in vitro_F in vitro_M in vivo_F in vivo_M NA_F NA_M
colour_palette=brewer.pal(n = length(levels(sample_groups)), name = "Paired")
colours <- colour_palette[as.integer(factor(targets$ARTgender))]
plot(1,axes = FALSE,xlab="",ylab="",main="MDS by ART and sex")
legend("center",legend=levels(sample_groups),pch=16,cex=1.2,col=colour_palette)
Figure 5. MDS plot coloured by ART classification and sex.

Figure 5. MDS plot coloured by ART classification and sex.

plotMDS(mydist, labels=rownames(targets), col=colours,main="sex chromosomes included")
Figure 5. MDS plot coloured by ART classification and sex.

Figure 5. MDS plot coloured by ART classification and sex.

plotMDS(mydist_flt, labels=rownames(targets), col=colours,main="sex chromosomes excluded")
Figure 5. MDS plot coloured by ART classification and sex.

Figure 5. MDS plot coloured by ART classification and sex.

Differential analysis

There are a few differential contrasts that would be of interest to us in this study:

The differential analysis is centred around limma to identify differentially methylated probes. TopConfects was also run to obtain the probes with the largest confident effect (topconfect). There are five outputs below:

  1. Volcano plot (limma result).

  2. Beeswarm plot (top probes by limma p-value).

  3. Heatmap (top probes by limma p-value).

  4. Beeswarm plot (top probes by topconfect ranking).

  5. Heatmap (top probes by topconfect ranking). (TODO)

Nat vs In vivo

# Include Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="NA" | `art treatment:ch1`=="in vivo")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("NA","in vivo"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
    mxs <- Mval[,which( colnames(Mval) %in% samplesheet$Basename )]
    fit.reduced <- lmFit(mxs,design)
    fit.reduced <- eBayes(fit.reduced)
    summary(decideTests(fit.reduced))
##        (Intercept)   sexF groupsin vivo
## Down        210046   5869             0
## NotSig       21546 469494        479729
## Up          248137   4366             0
top_NA_vs_invivo_inc <- dm_analysis(samplesheet=samplesheet,
    sex=sex,groups=groups,mx=Mval,name="top_NA_vs_invivo_inc",
    myann=myann ,beta= beta)
Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes

Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes

Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes

Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes

Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes

Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes

Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes

Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes

head(top_NA_vs_invivo_inc$dma)
##         Row.names UCSC_RefGene_Name Regulatory_Feature_Group      logFC
## 37792  cg01866330           SLC45A4                          -0.4229012
## 238262 cg13045913   DNAJC17;ZFYVE19      Promoter_Associated  0.5620240
## 313464 cg17328716         LOC650226                           0.5320257
## 189567 cg10071113             TSSC1                          -0.2969455
## 151933 cg07909498                                            -1.2108576
## 41161  cg02030958                                            -0.3464413
##          AveExpr         t      P.Value adj.P.Val             B
## 37792   3.553625 -4.859240 7.570646e-06 0.5252591  0.5177210257
## 238262 -2.541986  4.842400 8.059513e-06 0.5252591  0.4869350987
## 313464 -1.554288  4.781452 1.010040e-05 0.5252591  0.3757144407
## 189567  3.425431 -4.589701 2.038524e-05 0.5252591  0.0280579483
## 151933 -1.831452 -4.574248 2.156042e-05 0.5252591  0.0002065045
## 41161   4.131721 -4.572559 2.169281e-05 0.5252591 -0.0028362871
# Exclude Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="NA" | `art treatment:ch1`=="in vivo")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("NA","in vivo"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
    mxs <- Mval_flt[,which( colnames(Mval) %in% samplesheet$Basename )]
    fit.reduced <- lmFit(mxs,design)
    fit.reduced <- eBayes(fit.reduced)
    summary(decideTests(fit.reduced))
##        (Intercept)   sexF groupsin vivo
## Down        204127    875             0
## NotSig       21114 467621        468596
## Up          243355    100             0
top_NA_vs_invivo_exc <- dm_analysis(samplesheet=samplesheet,
    sex=sex,groups=groups,mx=Mval_flt,name="top_NA_vs_invivo_exc",
    myann=myann ,beta= beta)
Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes

Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes

Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes

Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes

Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes

Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes

Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes

Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes

head(top_NA_vs_invivo_exc$dma)
##         Row.names UCSC_RefGene_Name Regulatory_Feature_Group      logFC
## 36833  cg01866330           SLC45A4                          -0.4229012
## 232680 cg13045913   DNAJC17;ZFYVE19      Promoter_Associated  0.5620240
## 306258 cg17328716         LOC650226                           0.5320257
## 185055 cg10071113             TSSC1                          -0.2969455
## 40110  cg02030958                                            -0.3464413
## 148234 cg07909498                                            -1.2108576
##          AveExpr         t      P.Value adj.P.Val          B
## 36833   3.553625 -4.860724 7.531265e-06 0.5208319 0.54350196
## 232680 -2.541986  4.843020 8.043362e-06 0.5208319 0.51097288
## 306258 -1.554288  4.782161 1.007688e-05 0.5208319 0.39934829
## 185055  3.425431 -4.592608 2.017638e-05 0.5208319 0.05391657
## 40110   4.131721 -4.574547 2.154247e-05 0.5208319 0.02119920
## 148234 -1.831452 -4.573994 2.158568e-05 0.5208319 0.02019829

Nat vs In vitro

# Include Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="NA" | `art treatment:ch1`=="in vitro")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("NA","in vitro"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
    mxs <- Mval[,which( colnames(Mval) %in% samplesheet$Basename )]
    fit.reduced <- lmFit(mxs,design)
    fit.reduced <- eBayes(fit.reduced)
    summary(decideTests(fit.reduced))
##        (Intercept)   sexF groupsin vitro
## Down        209654   4936              0
## NotSig       20466 470575         479724
## Up          249609   4218              5
top_NA_vs_invitro_inc <- dm_analysis(samplesheet=samplesheet,
    sex=sex,groups=groups,mx=Mval,name="top_NA_vs_invitro_inc",
    myann=myann ,beta= beta)
Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes

Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes

Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes

Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes

Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes

Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes

Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes

Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes

head(top_NA_vs_invitro_inc$dma)
##         Row.names UCSC_RefGene_Name        Regulatory_Feature_Group     logFC
## 183102 cg09685182                                                   1.5353535
## 383766 cg21858485                                                   1.4074114
## 45648  cg02264082                                                   0.9171541
## 309783 cg17104425              PODN Unclassified_Cell_type_specific 0.5873643
## 72953  cg03651292                                                   0.7774220
## 82153  cg04136610          ADAMTS16                                 0.9467504
##           AveExpr        t      P.Value    adj.P.Val        B
## 183102 -2.3053388 7.008182 1.366032e-09 0.0006553252 8.304303
## 383766 -2.0789440 6.468313 1.265799e-08 0.0030362021 6.801015
## 45648  -1.8380561 5.930313 1.122454e-07 0.0179491197 5.303081
## 309783 -2.3314637 5.643580 3.518952e-07 0.0378088757 4.510037
## 72953  -0.6006281 5.614945 3.940649e-07 0.0378088757 4.431179
## 82153  -2.4370975 5.443791 7.721237e-07 0.0604027464 3.961462
# Exclude Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="NA" | `art treatment:ch1`=="in vitro")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("NA","in vitro"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
    mxs <- Mval_flt[,which( colnames(Mval_flt) %in% samplesheet$Basename )]
    fit.reduced <- lmFit(mxs,design)
    fit.reduced <- eBayes(fit.reduced)
    summary(decideTests(fit.reduced))
##        (Intercept)   sexF groupsin vitro
## Down        203748    531              0
## NotSig       20035 467977         468591
## Up          244813     88              5
top_NA_vs_invitro_exc <- dm_analysis(samplesheet=samplesheet,
    sex=sex,groups=groups,mx=Mval_flt,name="top_NA_vs_invitro_exc",
    myann=myann ,beta= beta)
Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes

Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes

Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes

Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes

Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes

Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes

Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes

Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes

head(top_NA_vs_invitro_exc$dma)
##         Row.names UCSC_RefGene_Name        Regulatory_Feature_Group     logFC
## 178741 cg09685182                                                   1.5353535
## 374878 cg21858485                                                   1.4074114
## 44485  cg02264082                                                   0.9171541
## 302653 cg17104425              PODN Unclassified_Cell_type_specific 0.5873643
## 71190  cg03651292                                                   0.7774220
## 80135  cg04136610          ADAMTS16                                 0.9467504
##           AveExpr        t      P.Value    adj.P.Val        B
## 178741 -2.3053388 7.008114 1.367185e-09 0.0006406572 8.345059
## 374878 -2.0789440 6.468255 1.266667e-08 0.0029677754 6.835851
## 44485  -1.8380561 5.930576 1.121660e-07 0.0175201782 5.333133
## 302653 -2.3314637 5.644565 3.506316e-07 0.0368842100 4.539169
## 71190  -0.6006281 5.615344 3.935609e-07 0.0368842100 4.458403
## 80135  -2.4370975 5.443910 7.719732e-07 0.0584033459 3.986219

Nat vs ART

# Include Sex Chromosomes
samplesheet <- targets
samplesheet$title <- as.character(samplesheet$title)
samplesheet$title <- (sapply(strsplit(samplesheet$title, " "), "[[", 1))
samplesheet$title <- (sapply(strsplit(samplesheet$title, "_"), "[[", 1))
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$title,levels=c("ART","Control"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
    mxs <- Mval[,which( colnames(Mval) %in% samplesheet$Basename )]
    fit.reduced <- lmFit(mxs,design)
    fit.reduced <- eBayes(fit.reduced)
    summary(decideTests(fit.reduced))
##        (Intercept)   sexF groupsControl
## Down        208613   8714             0
## NotSig       19308 466242        479729
## Up          251808   4773             0
top_ART_vs_Control_inc <- dm_analysis(samplesheet=samplesheet,
    sex=sex,groups=groups,mx=Mval,name="top_ART_vs_Natural_inc",
    myann=myann ,beta= beta)
Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes

Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes

Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes

Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes

Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes

Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes

Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes

Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes

head(top_ART_vs_Control_inc$dma)
##         Row.names UCSC_RefGene_Name Regulatory_Feature_Group      logFC
## 437740 cg25296923                                             0.3538648
## 473617 cg27477884                                            -0.3533839
## 37792  cg01866330           SLC45A4                           0.3440090
## 262670 cg14305701            INPP5A                           0.7627745
## 88939  cg04490037               DDC                           0.2033697
## 279210 cg15240419                                             0.1825872
##          AveExpr         t      P.Value adj.P.Val        B
## 437740 2.1818350  5.027381 2.549229e-06 0.4468383 3.142730
## 473617 0.5237203 -4.878135 4.650727e-06 0.4468383 2.713563
## 37792  3.5178538  4.861294 4.974325e-06 0.4468383 2.665510
## 262670 3.2650169  4.845725 5.292930e-06 0.4468383 2.621155
## 88939  0.3936439  4.830849 5.615854e-06 0.4468383 2.578837
## 279210 3.8673110  4.722155 8.631677e-06 0.4468383 2.271538
# Exclude Sex Chromosomes
samplesheet <- targets
samplesheet$title <- as.character(samplesheet$title)
samplesheet$title <- (sapply(strsplit(samplesheet$title, " "), "[[", 1))
samplesheet$title <- (sapply(strsplit(samplesheet$title, "_"), "[[", 1))
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$title,levels=c("ART","Control"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
    mxs <- Mval_flt[,which( colnames(Mval_flt) %in% samplesheet$Basename )]
    fit.reduced <- lmFit(mxs,design)
    fit.reduced <- eBayes(fit.reduced)
    summary(decideTests(fit.reduced))
##        (Intercept)   sexF groupsControl
## Down        202723   1990             0
## NotSig       18901 466407        468596
## Up          246972    199             0
top_ART_vs_Control_exc <- dm_analysis(samplesheet=samplesheet,
    sex=sex,groups=groups,mx=Mval_flt,name="top_ART_vs_Natural_exc",
    myann=myann ,beta= beta)
Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes

Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes

Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes

Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes

Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes

Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes

Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes

Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes

head(top_ART_vs_Control_exc$dma)
##         Row.names UCSC_RefGene_Name Regulatory_Feature_Group      logFC
## 427640 cg25296923                                             0.3538648
## 462648 cg27477884                                            -0.3533839
## 36833  cg01866330           SLC45A4                           0.3440090
## 256565 cg14305701            INPP5A                           0.7627745
## 86764  cg04490037               DDC                           0.2033697
## 272770 cg15240419                                             0.1825872
##          AveExpr         t      P.Value adj.P.Val        B
## 427640 2.1818350  5.028467 2.538491e-06  0.440713 3.183405
## 462648 0.5237203 -4.879111 4.633396e-06  0.440713 2.751332
## 36833  3.5178538  4.862330 4.954640e-06  0.440713 2.703161
## 256565 3.2650169  4.845670 5.294983e-06  0.440713 2.655414
## 86764  0.3936439  4.834363 5.538812e-06  0.440713 2.623055
## 272770 3.8673110  4.726287 8.493875e-06  0.440713 2.315591

in vitro vs in vivo

# Include Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="in vivo" | `art treatment:ch1`=="in vitro")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("in vivo","in vitro"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
    mxs <- Mval[,which( colnames(Mval) %in% samplesheet$Basename )]
    fit.reduced <- lmFit(mxs,design)
    fit.reduced <- eBayes(fit.reduced)
    summary(decideTests(fit.reduced))
##        (Intercept)   sexF groupsin vitro
## Down        205664  35877             10
## NotSig       26700 430659         479624
## Up          247365  13193             95
top_invivo_vs_invitro_inc <- dm_analysis(samplesheet=samplesheet,
    sex=sex,groups=groups,mx=Mval,name="top_invivo_vs_invitro_inc",
    myann=myann ,beta= beta)
Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes

Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes

Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes

Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes

Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes

Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes

Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes

Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes

head(top_invivo_vs_invitro_inc$dma)
##         Row.names               UCSC_RefGene_Name
## 72841  cg03646096                                
## 28936  cg01408508                                
## 446460 cg25860795                       FLI1;FLI1
## 49140  cg02441747 COL25A1;COL25A1;COL25A1;COL25A1
## 276765 cg15084543                     ELTD1;ELTD1
## 115628 cg05937969                            CASR
##               Regulatory_Feature_Group     logFC     AveExpr        t
## 72841                                  1.2416083  2.84813440 7.202227
## 28936                     Unclassified 1.0039389 -0.02762027 6.368744
## 446460             Promoter_Associated 1.3978054 -1.92479009 6.201598
## 49140  Unclassified_Cell_type_specific 0.9461161 -1.83889964 6.140996
## 276765                                 1.4338091 -1.40566774 6.008296
## 115628                                 0.9947499 -1.91755002 5.937261
##             P.Value   adj.P.Val        B
## 72841  4.931350e-09 0.002365712 9.141197
## 28936  8.605989e-08 0.020642712 6.867852
## 446460 1.528173e-07 0.022566761 6.405877
## 49140  1.881626e-07 0.022566761 6.238068
## 276765 2.966493e-07 0.027618011 5.870164
## 115628 3.784140e-07 0.027618011 5.673028
# Exclude Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="in vivo" | `art treatment:ch1`=="in vitro")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("in vivo","in vitro"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
    mxs <- Mval_flt[,which( colnames(Mval_flt) %in% samplesheet$Basename )]
    fit.reduced <- lmFit(mxs,design)
    fit.reduced <- eBayes(fit.reduced)
    summary(decideTests(fit.reduced))
##        (Intercept)   sexF groupsin vitro
## Down        199815  25742             10
## NotSig       26162 436539         468494
## Up          242619   6315             92
top_invivo_vs_invitro_exc <- dm_analysis(samplesheet=samplesheet,
    sex=sex,groups=groups,mx=Mval_flt,name="top_invivo_vs_invitro_exc",
    myann=myann ,beta= beta)
Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes

Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes

Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes

Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes

Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes

Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes

Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes

Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes

head(top_invivo_vs_invitro_exc$dma)
##         Row.names               UCSC_RefGene_Name
## 71081  cg03646096                                
## 28188  cg01408508                                
## 436140 cg25860795                       FLI1;FLI1
## 47905  cg02441747 COL25A1;COL25A1;COL25A1;COL25A1
## 270368 cg15084543                     ELTD1;ELTD1
## 112763 cg05937969                            CASR
##               Regulatory_Feature_Group     logFC     AveExpr        t
## 71081                                  1.2416083  2.84813440 7.202424
## 28188                     Unclassified 1.0039389 -0.02762027 6.369140
## 436140             Promoter_Associated 1.3978054 -1.92479009 6.201308
## 47905  Unclassified_Cell_type_specific 0.9461161 -1.83889964 6.141440
## 270368                                 1.4338091 -1.40566774 6.007949
## 112763                                 0.9947499 -1.91755002 5.937485
##             P.Value   adj.P.Val        B
## 71081  4.936159e-09 0.002313064 9.133532
## 28188  8.604697e-08 0.020160634 6.862791
## 436140 1.531422e-07 0.022033799 6.399312
## 47905  1.880835e-07 0.022033799 6.233669
## 270368 2.973122e-07 0.027863858 5.863874
## 112763 3.785047e-07 0.029560965 5.668483

Venn diagrams of the differential methylated probes for each contrast

First, we look at the similarity of DMPs altered between in vivo and in vitro.

#v1 <- list("in vivo up" = top_NA_vs_invivo_exc$dm_up , 
#           "in vitro up" = top_NA_vs_invitro_exc$dm_up ,
#           "in vivo dn" = top_NA_vs_invivo_exc$dm_dn ,
#           "in vitro dn" = top_NA_vs_invitro_exc$dm_dn )
#plot(euler(v1, shape = "ellipse"), quantities = TRUE)